rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)
library(ggplot2)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]

## merge in SH daily index
sh <- data.table(read_xlsx("../Data/SHIndex.xlsx", guess_max = 1e4))
sh <- sh[, .(Idxtrd01, Idxtrd05)]
names(sh) <- c("date", "SHindex")
sh <- sh[-1]
sh[, date := as.Date(date)]
setorder(sh, date)

sh[, ym := as.yearmon(date)]
sh <- sh[, .(SHindex = SHindex[.N]), by = .(ym)]
sh[, SHindex := as.numeric(SHindex)]
sh[, SHindex := SHindex]

m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]

###########################################################################
# Distribution of recalled episodes
###########################################################################

# Age
m_age1 <- m[age >= 35]

tmp <- table(m_age1[type == 0]$memrableperiod_begin) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"))

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-datestart_old.pdf", width = 6, height = 4)


tmp <- table(m_age1[type == 0]$memrableperiod_end) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"))

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-dateend_old.pdf", width = 6, height = 4)


m_age2 <- m[age < 35]

tmp <- table(m_age2$memrableperiod_begin) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"))

ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-datestart_young.pdf", width = 6, height = 4)

tmp <- table(m_age2$memrableperiod_end) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"))

ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-dateend_young.pdf", width = 6, height = 4)

################################################################
# Appendix: distribution of survey date and time
################################################################
m[, survey.time := substr(begin_time, 12, 12 + 4)]
m[, survey.hour := as.numeric(substr(survey.time, 1, 2))]

# histogram of survey hour
ggplot(m, aes(x = survey.hour, width = 1000)) +
  geom_bar(color = "black", fill = "white") +
  ggtitle("Hour") +
  xlab("") + theme_minimal()
ggsave("../Figures/pic-binscatter-surveyhour.pdf", width = 6, height = 4)

# convert survey date to date format
m[, survey.date := as.Date(date, "%d%b%Y")]

# histogram of survey date
ggplot(m, aes(x = survey.date)) +
  geom_bar(color = "black", fill = "white") +
  ggtitle("Date") +
  xlab("") + theme_minimal()
ggsave("../Figures/pic-binscatter-surveydate.pdf", width = 6, height = 4)
